library(geometry)
library(shape)


library('Morpho')
library(geomorph)

a=read.table("GMM-TM.txt", header=T)

specimens <- unique(a$spc)
list_df<- list()


# create a loop for each element in specimens
for (i in specimens) {
  carnivore <- unique(subset(a, spc==i,select=c(carnivore)))
  specimen <- subset(a, spc==i,select=c(x, y, points))
  #plot the specimen using small black filled points without axis or axis labels
  plot(specimen$x, specimen$y, pch = 19, col = "black", xlab = "", ylab = "", axes = FALSE)
  
  
  
  # Compute pairwise distances
  distances <- as.matrix(dist(specimen))  # Creates a matrix of pairwise distances
  
  
  max_indices <- which(distances == max(distances), arr.ind = TRUE) # Find the indices of the maximum distance
  
  LMs_df <- as.data.frame(matrix(c(specimen[max_indices[1, ][1], ]$x, 
                                   specimen[max_indices[1, ][1], ]$y,
                                   specimen[max_indices[1, ][2], ]$x, 
                                   specimen[max_indices[1, ][2], ]$y),
                                 nrow = 2, ncol = 2, byrow = TRUE))
  
  names(LMs_df) <- c("x", "y")
  
  LMs_df$label <- c("LM1", "LM2")
  
  # Calculating the Line Segment Between LM1 and LM2 ---------------------------
  # Calculate the slope of the line segment
  slope <- (LMs_df$y[2] - LMs_df$y[1]) / (LMs_df$x[2] - LMs_df$x[1])
  
  # Calculate the y-intercept of the line segment
  
  y_intercept <- LMs_df$y[1] - slope * LMs_df$x[1]
  #Add line segment to the plot
  abline(a = y_intercept, b = slope, col = "red")
  
  # Calculating Points Along the Segment at Specific Percentages ---------------
  percentages <- seq(0.1, 0.9, by = 0.1)
  
  # Calculate the x and y coordinates of the points along the line segment
  inter_points  <- data.frame(x = LMs_df$x[1] + (LMs_df$x[2] - LMs_df$x[1]) * percentages,
                              y = LMs_df$y[1] + (LMs_df$y[2] - LMs_df$y[1]) * percentages)
  names(inter_points) <- c("x", "y")
  #Add the points to the plot
  points(inter_points$x, inter_points$y, pch = 3, col = "green")
  
  # Drawing Perpendicular Lines and Finding Intersection Points with the outline stored in specimen ---------------
  intersections <- data.frame(x = numeric(), y = numeric(), label = character())
  for (i in 1:(nrow(specimen) - 1)) {
    if (specimen[i,]$x == specimen[i + 1,]$x) {
      # Vertical line case
      for (k in 1:nrow(inter_points)) {
        x_intersect <- specimen[i,]$x
        y_intersect <- inter_points[k,]$y
        if (y_intersect >= range(specimen[i,]$y, specimen[i + 1,]$y)[1] & y_intersect <= range(specimen[i,]$y, specimen[i + 1,]$y)[2]) {
          label_intersect <- paste0("LM", k * 10, ifelse(y_intersect > inter_points[k,]$y, "A", "B"))
          intersections <- rbind(intersections, data.frame(x = x_intersect, y = y_intersect, label = label_intersect))
        }
      }
    } else {
      # Non-vertical line case
      segment_slope <- (specimen[i + 1,]$y - specimen[i,]$y) / (specimen[i + 1,]$x - specimen[i,]$x)
      segment_intercept <- specimen[i,]$y - segment_slope * specimen[i,]$x
      perp_slope <- -1 / slope
      for (k in 1:nrow(inter_points)) {
        perp_intercept <- inter_points[k,]$y - perp_slope * inter_points[k,]$x
        x_intersect <- (segment_intercept - perp_intercept) / (perp_slope - segment_slope)
        y_intersect <- perp_slope * x_intersect + perp_intercept
        if (x_intersect >= range(specimen[i,]$x, specimen[i + 1,]$x)[1] & x_intersect <= range(specimen[i,]$x, specimen[i + 1,]$x)[2]) {
          label_intersect <- paste0("LM", k * 10, ifelse(y_intersect > inter_points[k,]$y, "A", "B"))
          intersections <- rbind(intersections, data.frame(x = x_intersect, y = y_intersect, label = label_intersect))
        }
      }
    }
  }
  
  # Add the intersection points to LMs_df
  LMs_df <- rbind(LMs_df, intersections)
  
  # Add the intersection points to the plot
  points(LMs_df$x, LMs_df$y, pch = 8, col = "purple")
  
  # Add the labels to the intersection points
  text(LMs_df$x, LMs_df$y, labels = LMs_df$label, pos = 3, col = "blue")
  
  #Add legend to the plot
  legend("topright", legend = c("Shape Outline", "Line Segment", "Points Along Segment", "SemiLandmarks"), 
         col = c("black", "red", "green", "purple"), pch = c(19, NA, 3, 8), lty = c(NA, 1, NA, NA))
  
  
  
  
  newlist_df  <- list(LMs_df)
  newlist_df <- setNames(list(LMs_df), as.character(carnivore)) 
  list_df = c(list_df, newlist_df) 
}

# Check the dfs with more, or less than 20 rows. Print number of rows, index and name of the df. 
#Add them to a counter, including the name of different names
counter = 0
names = list()

for (i in 1:length(list_df)){
  if (nrow(list_df[[i]]) != 20){
    print(paste("Name:", names(list_df[i])))
    print(paste("Index:", i))
    print(paste("Number of rows:", nrow(list_df[[i]])))
    
    counter = counter + 1
    names = c(names, names(list_df[i]))
  }
}
print(paste("Number of dfs with more or less than 20 rows:", counter))
print(paste("Total number of dfs:", length(list_df)))
#print the number of occurrences of each name
names <- as.factor(unlist(names))
print(table(names))

#Repeat same logic, but separating the dfs with more than 20 rows and less than 20 rows
counterPlus20 = 0
counterLess20 = 0
counter = 0
namesPlus20 = list()
namesLess20 = list()
for (i in 1:length(list_df)){
  if (nrow(list_df[[i]]) > 20){
    print(paste("Name:", names(list_df[i])))
    print(paste("Index:", i))
    print(paste("Number of rows:", nrow(list_df[[i]])))
    
    counter = counter + 1
    counterPlus20 = counterPlus20 + 1
    namesPlus20 = c(namesPlus20, names(list_df[i]))
  }
  if (nrow(list_df[[i]]) < 20){
    print(paste("Name:", names(list_df[i])))
    print(paste("Index:", i))
    print(paste("Number of rows:", nrow(list_df[[i]])))
    
    counter = counter + 1
    counterLess20 = counterLess20 + 1
    namesLess20 = c(namesLess20, names(list_df[i]))
    
  }
}
print(paste("Number of dfs with more than 20 rows:", counterPlus20))
print(paste("Number of dfs with less than 20 rows:", counterLess20))
print(paste("Number of dfs with more or less than 20 rows:", counter))
print(paste("Total number of dfs:", length(list_df)))

print(paste("Number of occurrences of each species with more than 20 rows"))
namesPlus20 <- as.factor(unlist(namesPlus20))
print(table(namesPlus20))

print(paste("Number of occurrences of each species with less than 20 rows"))
namesLess20 <- as.factor(unlist(namesLess20))
print(table(namesLess20))

print(paste("Number of occurrences of each species with more or less than 20 rows"))
names <- as.factor(unlist(names))
print(table(names))

# Print the total number of occurrences of each species in the df, regardless of the number of rows
print(paste("Number of occurrences of each species"))
print(table(as.factor(unlist(names(list_df)))))

# Add the dfs with 20 rows to a new list
list_df_20 = list()
for (i in 1:length(list_df)){
  if (nrow(list_df[[i]]) == 20){
    list_df_20 = c(list_df_20, list_df[i]) 
    
  }
}

# Print the total number of occurrences of each species in the df,with exactly 20 ros
print(paste("Number of occurrences of each species"))
print(table(as.factor(unlist(names(list_df_20)))))


landmark_array <- array(dim = c(20, 2, length(list_df_20)))

# Populate the array with the data from the list of dataframes
for (i in seq_along(list_df_20)) {
  specimen <- list_df_20[[i]]
  landmark_array[,,i] <- as.matrix(specimen[, c("x", "y")])
}

# Extract species names from the names of the list
species <- names(list_df_20)

# Combine into a geomorph data frame
gdf <- geomorph.data.frame(landmarks = landmark_array, species = species)

summary(gdf)

plot(gdf$landmarks[,,1])
plotAllSpecimens(gdf$landmarks, mean=F)
#GPA analysis
GPAcoords<-gpagen(gdf$landmarks)
gdf<-geomorph.data.frame(gdf, coords=GPAcoords$coords, size=GPAcoords$Csize)

plotAllSpecimens(gdf$coords, mean=T)

# Perform PCA on GPA aligned coordinates-----------------------------------------
pca <- gm.prcomp(gdf$coords)

# Summary of PCA results
summary(pca)

# Plot PCA results
plot(pca, pch = 19, col = as.factor(gdf$species))
legend("topright", legend = levels(as.factor(gdf$species)), col = 1:length(levels(as.factor(gdf$species))), pch = 19)


# Perform Procrustes ANOVA --------------------------------------------------------------
anova_result <- procD.lm(coords ~ species, data = gdf)
summary(anova_result)

# Perform pairwise anova by species
pairwise_result <- pairwise(anova_result, groups = species)
summary(pairwise_result)

# Perform Canonical Variate Analysis on GPA coords
cva_result <- CVA(gdf$coords, gdf$species)
summary(cva_result)
plot(cva_result$CVscores, pch = 19, col = as.factor(gdf$species))
legend("topright", legend = levels(as.factor(gdf$species)), col = 1:length(levels(as.factor(gdf$species))), pch = 19)
# Generate a confusion matrix
cva_class <- cva_result$class
confusion_matrix <- table(gdf$species, cva_class)
confusion_matrix
# percentage of correct classification
sum(diag(confusion_matrix)) / sum(confusion_matrix)
# percentage of correct classification per species
diag(confusion_matrix) / colSums(confusion_matrix)
# accuracy, precision, recall, and F1 score
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(accuracy)
precision <- diag(confusion_matrix) / colSums(confusion_matrix)
print(precision)
recall <- diag(confusion_matrix) / rowSums(confusion_matrix)
print(recall)
f1_score <- 2 * precision * recall / (precision + recall)
print(f1_score)

# Perform Canonical Variate Analysis on PCA results
cva_result <- CVA(pca$x, gdf$species)
summary(cva_result)
plot(cva_result$CVscores, pch = 19, col = as.factor(gdf$species))
legend("topright", legend = levels(as.factor(gdf$species)), col = 1:length(levels(as.factor(gdf$species))), pch = 19)
# Generate a confusion matrix
cva_class <- cva_result$class
confusion_matrix <- table(gdf$species, cva_class)
confusion_matrix
# percentage of correct classification
sum(diag(confusion_matrix)) / sum(confusion_matrix)
# percentage of correct classification per species
diag(confusion_matrix) / colSums(confusion_matrix)
# accuracy, precision, recall, and F1 score
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(accuracy)
precision <- diag(confusion_matrix) / colSums(confusion_matrix)
print(precision)
recall <- diag(confusion_matrix) / rowSums(confusion_matrix)
print(recall)
f1_score <- 2 * precision * recall / (precision + recall)
print(f1_score)


